import re
import os
import yaml
import pandas as pd

def extract_first_substring(input_string):
    pattern = r'\[.*?\]'
    match = re.search(pattern, input_string)
    if match:
        return match.group()
    else:
        return None
    
def extract_numbers(input_string):
    pattern = r'\[(\d+)\.\.(\d+)\]'
    match = re.search(pattern, input_string)
    
    if match:
        start_number = int(match.group(1))
        end_number = int(match.group(2))
        return start_number, end_number
    else:
        return None
    
def extract_area(path):
    with open(path, 'r') as yaml_file:
        art_data = yaml.load(yaml_file, Loader=yaml.FullLoader)

    components = []
    area = []
    counts = []
    art_df = pd.DataFrame()
    for x in art_data['ART_summary']['table_summary']:
        components.append(x['name'].split('.')[-1])
        count_range = extract_first_substring(x['name'])
        count = 1
        if count_range is not None:
            _, max_val = extract_numbers(count_range)
            count += max_val
        counts.append(count)
        area.append(x['area'])
    art_df["components"] = components
    art_df["area"] = area
    art_df["count"] = counts
    art_df["total_area"] = art_df["area"]*art_df["count"]

    return art_df

def extract_energy(path):
    with open(path, 'r') as yaml_file:
        ert_data = yaml_file.readlines()

    levels = [x for x in range(6)]
    networks = [x for x in range(6)]

    level_indices = []
    network_indices = []


    for line in range(len(ert_data)):
        for level in levels:
            if f"Level {level}" in ert_data[line]:
                level_indices.append(line)
        for network in networks:
            if f"Network {network}" in ert_data[line]:
                network_indices.append(line)
        
    name_pattern = r'=== (.+?) ==='
    energy_pattern = r'  Energy\s*\(total\)\s*:\s*(\d+\.\d+)\s*pJ'

    level_names = []
    level_energy = []

    for i in range(len(level_indices)-1):
        text = ert_data[level_indices[i]:level_indices[i+1]]
        for line in text:
            name_match = re.search(name_pattern, line)
            if name_match:
                level_names.append(name_match.group(1))
            energy_match = re.search(energy_pattern, line)
            if energy_match:
                level_energy.append(energy_match.group(1))

            
    text = ert_data[level_indices[len(level_indices)-1]:\
                    level_indices[len(level_indices)-1]+80]
    for line in text:
        name_match = re.search(name_pattern, line)
        if name_match:
            level_names.append(name_match.group(1))
        energy_match = re.search(energy_pattern, line)
        if energy_match:
            level_energy.append(energy_match.group(1))
    level_energy = [float(x) for x in level_energy]


    level_energy_vals = []
    for i in range(len(level_names)):
        if i==0:
            level_energy_vals.append(level_energy[i])
        if i in range(2,len(level_names)-1):
            level_energy_vals.append(0.0)
        if i ==len(level_names)-2:
            level_energy_vals.append(sum(level_energy[i-2:i]))
        if i ==len(level_names)-1:
            level_energy_vals.append(sum(level_energy[i-1:i+1]))

    network_names = []
    network_energy = []
    energy_pattern = r'  Energy\s*\(total\)\s*:\s*(\d+\.\d+)\s*pJ'

    for i in range(len(network_indices)-1):
        text = ert_data[network_indices[i]:network_indices[i+1]]
        for line in range(len(text)):
            if "---------" in text[line]:
                network_names.append(text[line+1].rstrip())

    text = ert_data[network_indices[len(network_indices)-1]:\
                    network_indices[len(network_indices)-1]+65]
    for line in range(len(text)):
        if "---------" in text[line]:
            network_names.append(text[line+1].rstrip())

    for i in range(2):
        text = ert_data[network_indices[i]:network_indices[i+1]]
        for line in range(len(text)):
            energy_match = re.search(energy_pattern, text[line])
            if energy_match:
                energy_val = float(energy_match.group(1))
                if energy_val != 0.0:
                    network_energy.append(energy_val)
    for i in range(len(network_names)-2):
        network_energy.append(0.0)
    
    names = level_names+network_names
    energy = level_energy+network_energy
    energy_df = pd.DataFrame()
    energy_df['components'] = names
    energy_df['energy'] = energy

    return energy_df

def extract_cycles(path):
    with open(path, 'r') as yaml_file:
        ert_data = yaml_file.readlines()

    for line in range(len(ert_data)):
        if "Summary Stats" in ert_data[line]:
            cycles_idx = line

    text = ert_data[cycles_idx:]
    for line in text:
        if "Cycles:" in line:
            cycles = int(line.split(':')[-1].strip())

    return cycles
